{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Flamingo Model Stats\n",
    "notebook to check the number of trainable and frozen parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "from flamingo_mini import FlamingoConfig, FlamingoModel\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "model loaded.\n"
     ]
    }
   ],
   "source": [
    "#config = FlamingoConfig(lm='facebook/opt-125m')\n",
    "#model = FlamingoModel(config)\n",
    "model = FlamingoModel.from_pretrained('dhansmair/flamingo-tiny')\n",
    "model.train()\n",
    "print('model loaded.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_named_parameters_trainable(named_params):\n",
    "    return [(name, tensor) for name, tensor in named_params if tensor.requires_grad]\n",
    "    \n",
    "    \n",
    "def print_nicely(big_number):\n",
    "    \"\"\"print with thousands-blocks separated\"\"\"\n",
    "    return '{:,}'.format(big_number).replace(',', ' ')\n",
    "    \n",
    "    \n",
    "def find_redundant(l):\n",
    "    dups, uniq = [], []\n",
    "    seen = set()\n",
    "\n",
    "    for x in l:\n",
    "        if x not in seen:\n",
    "            uniq.append(x)\n",
    "            seen.add(x)\n",
    "        else:\n",
    "            dups.append(x)\n",
    "            \n",
    "    return dups, uniq\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# General Stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "state_dict = list(model.flamingo.state_dict().keys())\n",
    "state_dict_trainable = list(model.flamingo.state_dict_trainable().keys())\n",
    "\n",
    "parameters = list(model.parameters())\n",
    "named_parameters = list(w for w, t in model.flamingo.named_parameters())\n",
    "parameters_trainable = list(model.parameters_trainable())\n",
    "named_parameters_trainable = list(w for w, t in get_named_parameters_trainable(model.flamingo.named_parameters()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "length state_dict: 797\n",
      "length state_dict_trainable: 209\n",
      "length parameters: 795\n",
      "length named_parameters: 795\n",
      "length parameters_trainable: 209\n",
      "length named_parameters_trainable: 209\n"
     ]
    }
   ],
   "source": [
    "print('length state_dict:', len(state_dict))\n",
    "print('length state_dict_trainable:', len(state_dict_trainable))\n",
    "print('length parameters:', len(parameters))\n",
    "print('length named_parameters:', len(named_parameters))\n",
    "print('length parameters_trainable:', len(parameters_trainable))\n",
    "print('length named_parameters_trainable:', len(named_parameters_trainable))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['vision_encoder.vision_model.embeddings.class_embedding',\n",
       " 'vision_encoder.vision_model.embeddings.position_ids',\n",
       " 'vision_encoder.vision_model.embeddings.patch_embedding.weight',\n",
       " 'vision_encoder.vision_model.embeddings.position_embedding.weight',\n",
       " 'vision_encoder.vision_model.pre_layrnorm.weight',\n",
       " 'vision_encoder.vision_model.pre_layrnorm.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.post_layernorm.weight',\n",
       " 'vision_encoder.vision_model.post_layernorm.bias',\n",
       " 'lm.decoder.embed_positions.weight',\n",
       " 'lm.decoder.final_layer_norm.weight',\n",
       " 'lm.decoder.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.0.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.0.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.0.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.0.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.0.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.0.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.1.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.1.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.1.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.1.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.1.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.1.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.2.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.2.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.2.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.2.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.2.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.2.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.3.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.3.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.3.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.3.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.3.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.3.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.4.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.4.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.4.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.4.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.4.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.4.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.5.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.5.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.5.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.5.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.5.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.5.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.6.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.6.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.6.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.6.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.6.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.6.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.7.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.7.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.7.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.7.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.7.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.7.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.8.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.8.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.8.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.8.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.8.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.8.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.9.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.9.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.9.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.9.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.9.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.9.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.10.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.10.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.10.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.10.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.10.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.10.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.11.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.11.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.11.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.11.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.11.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.11.lm_block.final_layer_norm.bias',\n",
       " 'lm_head.weight']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[p for p in state_dict if p not in state_dict_trainable]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['resampler.latents',\n",
       " 'resampler.time_pos_emb',\n",
       " 'resampler.layers.0.0.norm_media.weight',\n",
       " 'resampler.layers.0.0.norm_media.bias',\n",
       " 'resampler.layers.0.0.norm_latents.weight',\n",
       " 'resampler.layers.0.0.norm_latents.bias',\n",
       " 'resampler.layers.0.0.to_q.weight',\n",
       " 'resampler.layers.0.0.to_k.weight',\n",
       " 'resampler.layers.0.0.to_v.weight',\n",
       " 'resampler.layers.0.0.to_out.weight',\n",
       " 'resampler.layers.0.1.0.weight',\n",
       " 'resampler.layers.0.1.0.bias',\n",
       " 'resampler.layers.0.1.1.weight',\n",
       " 'resampler.layers.0.1.3.weight',\n",
       " 'resampler.layers.1.0.norm_media.weight',\n",
       " 'resampler.layers.1.0.norm_media.bias',\n",
       " 'resampler.layers.1.0.norm_latents.weight',\n",
       " 'resampler.layers.1.0.norm_latents.bias',\n",
       " 'resampler.layers.1.0.to_q.weight',\n",
       " 'resampler.layers.1.0.to_k.weight',\n",
       " 'resampler.layers.1.0.to_v.weight',\n",
       " 'resampler.layers.1.0.to_out.weight',\n",
       " 'resampler.layers.1.1.0.weight',\n",
       " 'resampler.layers.1.1.0.bias',\n",
       " 'resampler.layers.1.1.1.weight',\n",
       " 'resampler.layers.1.1.3.weight',\n",
       " 'resampler.layers.2.0.norm_media.weight',\n",
       " 'resampler.layers.2.0.norm_media.bias',\n",
       " 'resampler.layers.2.0.norm_latents.weight',\n",
       " 'resampler.layers.2.0.norm_latents.bias',\n",
       " 'resampler.layers.2.0.to_q.weight',\n",
       " 'resampler.layers.2.0.to_k.weight',\n",
       " 'resampler.layers.2.0.to_v.weight',\n",
       " 'resampler.layers.2.0.to_out.weight',\n",
       " 'resampler.layers.2.1.0.weight',\n",
       " 'resampler.layers.2.1.0.bias',\n",
       " 'resampler.layers.2.1.1.weight',\n",
       " 'resampler.layers.2.1.3.weight',\n",
       " 'resampler.layers.3.0.norm_media.weight',\n",
       " 'resampler.layers.3.0.norm_media.bias',\n",
       " 'resampler.layers.3.0.norm_latents.weight',\n",
       " 'resampler.layers.3.0.norm_latents.bias',\n",
       " 'resampler.layers.3.0.to_q.weight',\n",
       " 'resampler.layers.3.0.to_k.weight',\n",
       " 'resampler.layers.3.0.to_v.weight',\n",
       " 'resampler.layers.3.0.to_out.weight',\n",
       " 'resampler.layers.3.1.0.weight',\n",
       " 'resampler.layers.3.1.0.bias',\n",
       " 'resampler.layers.3.1.1.weight',\n",
       " 'resampler.layers.3.1.3.weight',\n",
       " 'resampler.layers.4.0.norm_media.weight',\n",
       " 'resampler.layers.4.0.norm_media.bias',\n",
       " 'resampler.layers.4.0.norm_latents.weight',\n",
       " 'resampler.layers.4.0.norm_latents.bias',\n",
       " 'resampler.layers.4.0.to_q.weight',\n",
       " 'resampler.layers.4.0.to_k.weight',\n",
       " 'resampler.layers.4.0.to_v.weight',\n",
       " 'resampler.layers.4.0.to_out.weight',\n",
       " 'resampler.layers.4.1.0.weight',\n",
       " 'resampler.layers.4.1.0.bias',\n",
       " 'resampler.layers.4.1.1.weight',\n",
       " 'resampler.layers.4.1.3.weight',\n",
       " 'resampler.layers.5.0.norm_media.weight',\n",
       " 'resampler.layers.5.0.norm_media.bias',\n",
       " 'resampler.layers.5.0.norm_latents.weight',\n",
       " 'resampler.layers.5.0.norm_latents.bias',\n",
       " 'resampler.layers.5.0.to_q.weight',\n",
       " 'resampler.layers.5.0.to_k.weight',\n",
       " 'resampler.layers.5.0.to_v.weight',\n",
       " 'resampler.layers.5.0.to_out.weight',\n",
       " 'resampler.layers.5.1.0.weight',\n",
       " 'resampler.layers.5.1.0.bias',\n",
       " 'resampler.layers.5.1.1.weight',\n",
       " 'resampler.layers.5.1.3.weight',\n",
       " 'resampler.norm.weight',\n",
       " 'resampler.norm.bias',\n",
       " 'lm.decoder.embed_tokens.weight',\n",
       " 'lm.decoder.layers.0.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.0.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.0.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.0.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.0.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.0.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.0.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.0.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.0.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.0.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.0.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.1.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.1.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.1.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.1.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.1.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.1.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.1.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.1.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.1.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.1.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.1.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.2.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.2.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.2.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.2.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.2.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.2.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.2.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.2.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.2.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.2.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.2.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.3.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.3.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.3.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.3.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.3.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.3.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.3.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.3.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.3.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.3.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.3.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.4.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.4.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.4.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.4.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.4.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.4.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.4.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.4.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.4.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.4.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.4.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.5.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.5.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.5.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.5.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.5.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.5.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.5.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.5.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.5.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.5.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.5.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.6.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.6.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.6.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.6.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.6.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.6.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.6.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.6.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.6.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.6.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.6.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.7.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.7.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.7.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.7.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.7.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.7.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.7.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.7.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.7.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.7.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.7.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.8.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.8.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.8.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.8.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.8.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.8.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.8.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.8.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.8.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.8.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.8.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.9.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.9.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.9.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.9.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.9.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.9.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.9.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.9.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.9.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.9.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.9.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.10.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.10.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.10.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.10.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.10.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.10.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.10.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.10.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.10.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.10.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.10.xattn_block.ffw.3.weight',\n",
       " 'lm.decoder.layers.11.xattn_block.alpha_attn',\n",
       " 'lm.decoder.layers.11.xattn_block.alpha_ffw',\n",
       " 'lm.decoder.layers.11.xattn_block.attn.norm.weight',\n",
       " 'lm.decoder.layers.11.xattn_block.attn.norm.bias',\n",
       " 'lm.decoder.layers.11.xattn_block.attn.to_q.weight',\n",
       " 'lm.decoder.layers.11.xattn_block.attn.to_kv.weight',\n",
       " 'lm.decoder.layers.11.xattn_block.attn.to_out.weight',\n",
       " 'lm.decoder.layers.11.xattn_block.ffw.0.weight',\n",
       " 'lm.decoder.layers.11.xattn_block.ffw.0.bias',\n",
       " 'lm.decoder.layers.11.xattn_block.ffw.1.weight',\n",
       " 'lm.decoder.layers.11.xattn_block.ffw.3.weight']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# trainable parameters\n",
    "named_parameters_trainable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['vision_encoder.vision_model.embeddings.class_embedding',\n",
       " 'vision_encoder.vision_model.embeddings.patch_embedding.weight',\n",
       " 'vision_encoder.vision_model.embeddings.position_embedding.weight',\n",
       " 'vision_encoder.vision_model.pre_layrnorm.weight',\n",
       " 'vision_encoder.vision_model.pre_layrnorm.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.0.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.1.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.2.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.3.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.4.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.5.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.6.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.7.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.8.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.9.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.10.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.11.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.12.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.13.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.14.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.15.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.16.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.17.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.18.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.19.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.20.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.21.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.22.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.k_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.k_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.v_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.v_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.q_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.q_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.out_proj.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.self_attn.out_proj.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc1.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc1.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.mlp.fc2.bias',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm2.weight',\n",
       " 'vision_encoder.vision_model.encoder.layers.23.layer_norm2.bias',\n",
       " 'vision_encoder.vision_model.post_layernorm.weight',\n",
       " 'vision_encoder.vision_model.post_layernorm.bias',\n",
       " 'lm.decoder.embed_positions.weight',\n",
       " 'lm.decoder.final_layer_norm.weight',\n",
       " 'lm.decoder.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.0.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.0.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.0.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.0.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.0.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.0.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.0.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.1.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.1.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.1.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.1.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.1.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.1.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.1.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.2.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.2.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.2.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.2.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.2.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.2.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.2.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.3.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.3.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.3.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.3.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.3.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.3.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.3.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.4.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.4.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.4.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.4.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.4.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.4.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.4.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.5.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.5.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.5.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.5.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.5.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.5.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.5.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.6.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.6.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.6.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.6.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.6.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.6.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.6.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.7.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.7.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.7.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.7.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.7.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.7.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.7.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.8.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.8.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.8.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.8.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.8.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.8.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.8.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.9.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.9.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.9.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.9.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.9.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.9.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.9.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.10.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.10.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.10.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.10.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.10.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.10.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.10.lm_block.final_layer_norm.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.k_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.k_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.v_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.v_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.q_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.q_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.out_proj.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn.out_proj.bias',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn_layer_norm.weight',\n",
       " 'lm.decoder.layers.11.lm_block.self_attn_layer_norm.bias',\n",
       " 'lm.decoder.layers.11.lm_block.fc1.weight',\n",
       " 'lm.decoder.layers.11.lm_block.fc1.bias',\n",
       " 'lm.decoder.layers.11.lm_block.fc2.weight',\n",
       " 'lm.decoder.layers.11.lm_block.fc2.bias',\n",
       " 'lm.decoder.layers.11.lm_block.final_layer_norm.weight',\n",
       " 'lm.decoder.layers.11.lm_block.final_layer_norm.bias']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# parameters that are frozen\n",
    "[p for p in named_parameters if p not in named_parameters_trainable]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Total number of parameters in the model\n",
    "note that the model does not include the vision encoder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "params trainable: 180 312 856\n",
      "params total: 570 123 032\n"
     ]
    }
   ],
   "source": [
    "num_params_total = model.num_parameters(only_trainable=False)\n",
    "num_params_trainable = model.num_parameters(only_trainable=True)\n",
    "\n",
    "print('params trainable:', print_nicely(num_params_trainable))\n",
    "print('params total:', print_nicely(num_params_total))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "params resampler: 63 023 104\n"
     ]
    }
   ],
   "source": [
    "num_resampler_params = sum(p.numel() for p in model.flamingo.resampler.parameters()) \n",
    "print('params resampler:', print_nicely(num_resampler_params))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.10 ('venv')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "a5f5701800bdd14e24e97aedd482a8c969e987af6fa283f4999757dcb417784b"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
